import random
from datetime import timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as ply
import seaborn as sns
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import calmap
import folium
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801'
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import warnings
warnings.filterwarnings('ignore')
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)
full_table = pd.read_csv('C:/Users/rsingh/usa_county_wise.csv',
parse_dates=['Date'])
full_table.sample(6)
# Active Case = confirmed - deaths
full_table['Active'] = full_table['Confirmed'] - full_table['Deaths']
# filling missing values
full_table[['Province_State']] = full_table[['Province_State']].fillna('')
full_table[['Confirmed', 'Deaths','Active']] = full_table[['Confirmed', 'Deaths', 'Active']].fillna(0)
full_table.sample(6)
# Grouped by day, State
# =======================
full_grouped = full_table.groupby(['Date', 'Province_State'])['Confirmed', 'Deaths', 'Active'].sum().reset_index()
# new cases ======================================================
temp = full_grouped.groupby(['Province_State', 'Date', ])['Confirmed', 'Deaths']
temp = temp.sum().diff().reset_index()
mask = temp['Province_State'] != temp['Province_State'].shift(1)
temp.loc[mask, 'Confirmed'] = np.nan
temp.loc[mask, 'Deaths'] = np.nan
# renaming columns
temp.columns = ['Province_State', 'Date', 'New cases', 'New deaths']
# =================================================================
# merging new values
full_grouped = pd.merge(full_grouped, temp, on=['Province_State', 'Date'])
# filling na with 0
full_grouped = full_grouped.fillna(0)
# fixing data types
cols = ['New cases', 'New deaths']
full_grouped[cols] = full_grouped[cols].astype('int')
full_grouped['New cases'] = full_grouped['New cases'].apply(lambda x: 0 if x<0 else x)
full_grouped.head()
# Day wise
# ========
# table
day_wise = full_grouped.groupby('Date')['Confirmed', 'Deaths','Active', 'New cases'].sum().reset_index()
# number cases per 100 cases
day_wise['Deaths / 100 Cases'] = round((day_wise['Deaths']/day_wise['Confirmed'])*100, 2)
# no. of states
day_wise['No. of States'] = full_grouped[full_grouped['Confirmed']!=0].groupby('Date')['Province_State'].unique().apply(len).values
# fillna by 0
cols = ['Deaths / 100 Cases']
day_wise[cols] = day_wise[cols].fillna(0)
day_wise.head()
# State wise
# ============
# getting latest values
state_wise = full_grouped[full_grouped['Date']==max(full_grouped['Date'])].reset_index(drop=True).drop('Date', axis=1)
# group by state
state_wise = state_wise.groupby('Province_State')['Confirmed', 'Deaths', 'Active', 'New cases'].sum().reset_index()
# per 100 cases
state_wise['Deaths / 100 Cases'] = round((state_wise['Deaths']/state_wise['Confirmed'])*100, 2)
cols = ['Deaths / 100 Cases']
state_wise[cols] = state_wise[cols].fillna(0)
state_wise_sort = state_wise.sort_values('Confirmed', ascending =False)
state_wise_sort.head()
# load population dataset
pop = pd.read_csv('C:/Users/rsingh/usa_population_by_state_2020.csv')
# select only population
pop = pop.iloc[:, :2]
# rename column names
pop.columns = ['Province_State', 'Population']
# merged data
state_wise = pd.merge(state_wise, pop, on='Province_State', how='left')
# missing values
# country_wise.isna().sum()
# country_wise[country_wise['Population'].isna()]['Country/Region'].tolist()
# Cases per population
state_wise['Cases / 1000 People'] = round((state_wise['Confirmed'] / state_wise['Population']) * 1000)
state_wise.head()
today = full_grouped[full_grouped['Date']==max(full_grouped['Date'])].reset_index(drop=True).drop('Date', axis=1)[['Province_State', 'Confirmed']]
last_week = full_grouped[full_grouped['Date']==max(full_grouped['Date'])-timedelta(days=7)].reset_index(drop=True).drop('Date', axis=1)[['Province_State', 'Confirmed']]
temp = pd.merge(today, last_week, on='Province_State', suffixes=(' today', ' last week'))
temp['1 week change'] = temp['Confirmed today'] - temp['Confirmed last week']
temp = temp[['Province_State', 'Confirmed last week', '1 week change']]
state_wise = pd.merge(state_wise, temp, on='Province_State')
state_wise['1 week % increase'] = round(state_wise['1 week change']/state_wise['Confirmed last week']*100, 2)
state_wise.sort_values('Deaths')
state_wise.head()
temp = full_table.groupby('Date')['Confirmed', 'Deaths', 'Active'].sum().reset_index()
temp = temp[temp['Date']==max(temp['Date'])].reset_index(drop=True)
tm = temp.melt(id_vars="Date", value_vars=['Active', 'Deaths'])
fig = px.treemap(tm, path=["variable"], values="value", height=300, width=1000,
color_discrete_sequence=[act, rec, dth])
fig.data[0].textinfo = 'label+text+value'
fig.show()
temp = full_table.groupby('Date')['Deaths', 'Active'].sum().reset_index()
temp = temp.melt(id_vars="Date", value_vars=['Deaths', 'Active'],
var_name='Case', value_name='Count')
temp.head()
fig = px.area(temp, x="Date", y="Count", color='Case', height=400,
title='Cases in USA over time', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
fig = px.choropleth(full_table, locations="iso2", locationmode='USA-states', color=np.log(full_table["Confirmed"]),
hover_name="iso2", animation_frame=full_table["Date"].dt.strftime('%Y-%m-%d'),
title='Cases over time', color_continuous_scale=px.colors.sequential.Magenta)
fig.update(layout_coloraxis_showscale=False)
fig.show()